tutorials-and-examples/how-tos/Automation Setup - Configure Azure Machine Learning Compute Cluster and Managed Identity.ipynb (457 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "source": [ "# Automation Setup - Configure Azure Machine Learning Compute Cluster and Managed Identity\r\n", "\r\n", "__Notebook Version:__ 1.0<br>\r\n", "__Python Version:__ Python 3.8 - AzureML<br>\r\n", "__Required Packages:__ No<br>\r\n", "__Platforms Supported:__ Azure Machine Learning Notebooks\r\n", " \r\n", "__Data Source Required:__ No \r\n", " \r\n", "### Description\r\n", "This is the first notebook of series for setting up Microsoft Sentinel notebook automation platform based on Azure Machine Learning Pipelines.</br>\r\n", "This notebook provides step-by-step instructions to create Azure Machine Learning compute cluster, and add user assigned managed identity to the compute cluster.</br>\r\n", "This AML compute cluster will be used as the computing power for Sentinel notebook automation. It can be used for multiple automated notebooks.</br>\r\n", "Adding user assigned managed identity to the compute cluster will enable schedule notebooks to access tenant's Azure resources.\r\n", "\r\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\r\n", "\r\n", "## Table of Contents\r\n", "1. Warm-up\r\n", "2. Authentication to Azure Resources\r\n", "3. User-assigned managed identity\r\n", "4. Azure Machine Learning Compute Cluster" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure Machine Learning and Pipeline SDK-specific imports\r\n", "# azureml\r\n", "import azureml.core\r\n", "from azureml.core import Workspace, Experiment\r\n", "from azureml.core.compute import AmlCompute, ComputeTarget\r\n", "from azureml.core.datastore import Datastore\r\n", "\r\n", "# azure common/core\r\n", "from azure.common.credentials import get_azure_cli_credentials\r\n", "from azure.mgmt.resource import ResourceManagementClient\r\n", "\r\n", "# Python/ipython\r\n", "import json\r\n", "from IPython.display import display, HTML, Markdown\r\n", "\r\n", "print(\"SDK version:\", azureml.core.VERSION)" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1641319923703 } } }, { "cell_type": "code", "source": [ "# Functions will be used in this notebook\r\n", "def read_config_values(file_path):\r\n", " \"This loads pre-generated parameters for Microsoft Sentinel Workspace\"\r\n", " with open(file_path) as json_file:\r\n", " if json_file:\r\n", " json_config = json.load(json_file)\r\n", " return (json_config[\"tenant_id\"],\r\n", " json_config[\"subscription_id\"],\r\n", " json_config[\"resource_group\"],\r\n", " json_config[\"workspace_id\"],\r\n", " json_config[\"workspace_name\"],\r\n", " json_config[\"user_alias\"],\r\n", " json_config[\"user_object_id\"])\r\n", " return None\r\n", "\r\n", "def has_valid_token():\r\n", " \"Check to see if there is a valid AAD token\"\r\n", " try:\r\n", " credentials, sub_id = get_azure_cli_credentials()\r\n", " creds = credentials._get_cred(resource=None)\r\n", " token = creds._token_retriever()[2]\r\n", " print(\"Successfully signed in.\")\r\n", " return True\r\n", " except Exception as ex:\r\n", " if \"Please run 'az login' to setup account\" in str(ex):\r\n", " print(\"Please sign in first.\")\r\n", " return False\r\n", " elif \"AADSTS70043: The refresh token has expired\" in str(ex):\r\n", " message = \"**The refresh token has expired. <br> Please continue your login process. Then: <br> 1. If you plan to run multiple notebooks on the same compute instance today, you may restart the compute instance by clicking 'Compute' on left menu, then select the instance, clicking 'Restart'; <br> 2. Otherwise, you may just restart the kernel from top menu. <br> Finally, close and re-load the notebook, then re-run cells one by one from the top.**\"\r\n", " display(Markdown(message))\r\n", " return False\r\n", " elif \"[Errno 2] No such file or directory: '/home/azureuser/.azure/azureProfile.json'\" in str(ex):\r\n", " print(\"Please sign in.\")\r\n", " return False\r\n", " else:\r\n", " print(str(ex))\r\n", " return False\r\n", " except:\r\n", " print(\"Please restart the kernel, and run 'az login'.\")\r\n", " return False" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641319929197 } } }, { "cell_type": "code", "source": [ "# Calling the above function to populate Microsoft Sentinel workspace parameters\r\n", "# The file, config.json, was generated by the system, however, you may modify the values, or manually set the variables\r\n", "tenant_id, subscription_id, resource_group, workspace_id, workspace_name, user_alias, user_object_id = read_config_values('config.json');\r\n", "print(\"Subscription Id: \" + subscription_id)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641335902117 } } }, { "cell_type": "markdown", "source": [ "## 2. Authentication to Azure Resources" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Azure CLI is used to get device code to login into Azure, you need to copy the code and open the DeviceLogin site.\r\n", "# You may add [--tenant $tenant_id] to the command\r\n", "if has_valid_token() == False:\r\n", " !echo -e '\\e[42m'\r\n", " !az login --tenant $tenant_id --use-device-code" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641319938155 } } }, { "cell_type": "markdown", "source": [ "## 3. User-assigned Managed Identity " ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# 1. Please enter name of an Azure resource group with which you want to create an user assigned managed identity\r\n", "resource_group = 'myresourcegroup'" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641335957503 } } }, { "cell_type": "code", "source": [ "# 2. Please enter name for an existing user assigned managed identity or for creating a new user assigned managed identity\r\n", "user_assigned_managed_identity = 'myuai2022'" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641335960633 } } }, { "cell_type": "code", "source": [ "# 3. Create a new user assigned managed identity if it doesn't exist\r\n", "id_list = !az identity list --subscription $subscription_id -g $resource_group\r\n", "\r\n", "if len(id_list.grep('\"name\"', field=0).grep(user_assigned_managed_identity, field=1)) > 0:\r\n", " print('Found existing user-assigned managed identity.')\r\n", "else:\r\n", " print('Create a new user-assigned managed identity.')\r\n", " !az identity create --subscription $subscription_id -g $resource_group -n $user_assigned_managed_identity\r\n", " new_uamiList = !az identity list --subscription $subscription_id -g $resource_group" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641335969833 } } }, { "cell_type": "code", "source": [ "# 4. Assign the user assigned managed identity a contributor access to the target resource: resource_group, as default\r\n", "# NEED TO RUN ONLY ONCE\r\n", "principal_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query principalId\r\n", "uami_id_raw = !az identity show --subscription $subscription_id -g $resource_group --name $user_assigned_managed_identity --query id\r\n", "principal_id = principal_id_raw[0][1:-1]\r\n", "uami_id = uami_id_raw[0][1:-1]\r\n", "print(uami_id)\r\n", "\r\n", "target_resource_id = '/subscriptions/{0}/resourceGroups/{1}'.format(subscription_id, resource_group)\r\n", "!az role assignment create --assignee $principal_id --role 'Contributor' --scope $target_resource_id" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641337288050 } } }, { "cell_type": "markdown", "source": [ "## 4. Azure Machine Learning Compute Cluster" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# 1. Please enter name for an existing compute cluster or for creating a new compute cluster\r\n", "amlcompute_cluster_name = 'compcl2022'" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641336983503 } } }, { "cell_type": "code", "source": [ "# 2. Get AML workspace\r\n", "# Enter current AML workspace name\r\n", "current_aml_workspace_name = 'auto2022'\r\n", "ws = Workspace.get(name=current_aml_workspace_name, subscription_id=subscription_id, resource_group=resource_group)\r\n", "print(ws)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641336925324 } } }, { "cell_type": "code", "source": [ "# 2. Check if this compute cluster already exists in the workspace. If not, a new one will be created.\r\n", "# The newly created user assigned managed identity was added to the new compute cluster\r\n", "found = False\r\n", "cts = ws.compute_targets\r\n", "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\r\n", " found = True\r\n", " print('Found existing compute target.')\r\n", " compute_target = cts[amlcompute_cluster_name]\r\n", "if not found:\r\n", " print('Creating a new compute target...')\r\n", " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", max_nodes = 4, identity_type=\"UserAssigned\", identity_id=[uami_id]) \r\n", " # for GPU, use \"STANDARD_NC6\" #vm_priority = 'lowpriority', # optional \r\n", " \r\n", " # Create the cluster. For a more detailed view of current AmlCompute status, use get_status().\r\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\r\n", " compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1641337998748 } } } ], "metadata": { "kernelspec": { "name": "python3-azureml", "language": "python", "display_name": "Python 3.6 - AzureML" }, "language_info": { "name": "python", "version": "3.6.9", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "kernel_info": { "name": "python3-azureml" }, "microsoft": { "host": { "AzureML": { "notebookHasBeenCompleted": true } } }, "nteract": { "version": "nteract-front-end@1.0.0" } }, "nbformat": 4, "nbformat_minor": 0 }